Setup

library(tidyverse)
library(lubridate)
library(leaflet)
incidents <- feather::read_feather("incidents.feather")
drivers <- feather::read_feather("drivers.feather")
non_motorists <- feather::read_feather("non_motorists.feather")
single_color <- "#3182bd"
basic_theme <- theme(
  plot.title = element_text(size = 24, vjust = .5, hjust = .5),
  axis.title = element_text(size = 16, hjust = .5, vjust = .5),
  panel.background = element_rect(fill = "white"),
  legend.position = "none",
  panel.grid.major.y = element_line(color = "grey75", size = .1, linetype = "solid")
)

Incidents

This is a quick overview of the structure of the Incidents table.

skimr::skim_to_wide(incidents) %>% DT::datatable()

There have been 142 fatal crashes out of 56546 recorded incidents since this data set began in 2015.

incidents %>%
  group_by(acrs_report_type) %>%
  count() %>%
  knitr::kable()
acrs_report_type n
Fatal Crash 142
Injury Crash 20237
Property Damage Crash 36167

Location of Incidents

This interactive map shows the locations of the incidents in the data set. The 105 incidents that do not fall within the limits of the county are colored red.

pal <- colorFactor(c("blue", "red"), domain = c(0, 1))
incidents %>%
  leaflet() %>%
  addTiles() %>%
  addCircleMarkers(lng = ~longitude, lat = ~latitude, radius = 3, stroke = TRUE, weight = 2, opacity = 1, color = ~ pal(not_in_county))

Time variables

Crashes peak around 5:00 PM, which lines up with the evening rush hour. It seems that the report times are often rounded to the nearest fifteen or five minute round number.

incidents %>%
  select(incident_hour, incident_minute) %>%
  mutate(incident_time = 60 * incident_hour + incident_minute) %>%
  ggplot() +
  stat_count(aes(x = incident_time), fill = single_color) +
  scale_x_continuous(
    name = "Time",
    breaks = seq.int(from = 0, to = 24 * 60, by = 120),
    labels = c(paste0(seq.int(from = 0, to = 22, by = 2), ":00"), "0:00")
  ) +
  ggtitle("Crashes by Time of Day") +
  basic_theme

Crashes are slightly less frequent on the weekends than on weekdays.

incidents %>%
  select(incident_weekday) %>%
  ggplot() +
  stat_count(aes(x = incident_weekday), fill = single_color) +
  scale_x_continuous(
    name = "Day of Week",
    breaks = 1:7,
    labels = c("Sunday", "Monday", "Tuesday", "Wednesday", "Thurday", "Friday", "Saturday")
  ) +
  ggtitle("Crashes by Day of Week") +
  basic_theme

The number of crashes seems to be fairly constant over time.

incidents %>%
  select(incident_year) %>%
  ggplot() +
  stat_count(aes(x = incident_year), fill = single_color) +
  ggtitle("Crashes by Year") +
  basic_theme

This table contains the number of crashes recorded on each date. 2019 is excluded because the year hasn’t concluded and the counts for dates that already have happened would be inflated. The dates with the most crashes tend to come from the last four months of the year, but holidays including Christmas and New Years are among the days with the fewest crashes.

incidents %>%
  filter(incident_year < 2019) %>%
  select(incident_month, incident_day) %>%
  mutate(incident_date = paste0(incident_month, "/", incident_day)) %>%
  count(incident_date) %>%
  arrange(desc(n)) %>%
  rename(Date = incident_date, `Number of Crashes` = n) %>%
  DT::datatable()

Weather

Most crashes come under clear weather conditions.

incidents %>%
  group_by(weather) %>%
  count() %>%
  arrange(desc(n)) %>%
  knitr::kable()
weather n
CLEAR 37223
RAINING 6893
CLOUDY 5902
N/A 4764
SNOW 675
UNKNOWN 349
FOGGY 228
WINTRY MIX 172
OTHER 138
SLEET 77
SEVERE WINDS 64
BLOWING SNOW 54
BLOWING SAND SOIL DIRT 7

Substance Abuse

Crashes involving alcohol and illegal drugs constitute a small minority of the total incidents.

incidents %>%
  mutate(alcohol = driver_substance_abuse_alcohol_present | driver_substance_abuse_alcohol_contributed) %>%
  count(alcohol) %>%
  knitr::kable()
alcohol n
FALSE 53354
TRUE 3192
incidents %>%
  count(driver_substance_abuse_alcohol_contributed) %>%
  knitr::kable()
driver_substance_abuse_alcohol_contributed n
0 55683
1 863
incidents %>%
  count(driver_substance_abuse_alcohol_present) %>%
  knitr::kable()
driver_substance_abuse_alcohol_present n
0 54214
1 2332
incidents %>%
  mutate(illegal_drug = driver_substance_abuse_illegal_drug_present | driver_substance_abuse_illegal_drug_contributed) %>%
  count(illegal_drug) %>%
  knitr::kable()
illegal_drug n
FALSE 56329
TRUE 217
incidents %>%
  count(driver_substance_abuse_illegal_drug_contributed) %>%
  knitr::kable()
driver_substance_abuse_illegal_drug_contributed n
0 56501
1 45
incidents %>%
  count(driver_substance_abuse_illegal_drug_present) %>%
  knitr::kable()
driver_substance_abuse_illegal_drug_present n
0 56374
1 172

Alcohol and Time

Crashes involving alcohol do not follow the general time of day trends of crashes as a whole. The late night and early morning hours see more crashes than the daylight hours.

incidents %>%
  filter(driver_substance_abuse_alcohol_present | driver_substance_abuse_alcohol_contributed) %>%
  select(incident_hour, incident_minute) %>%
  mutate(incident_time = 60 * incident_hour + incident_minute) %>%
  ggplot() +
  stat_count(aes(x = incident_time), fill = single_color) +
  scale_x_continuous(
    name = "Time",
    breaks = seq.int(from = 0, to = 24 * 60, by = 120),
    labels = paste0(seq.int(from = 0, to = 24, by = 2), ":00")
  ) +
  ggtitle("Crashes Involving Alcohol by Time of Day") +
  basic_theme

Weekends see more alchol related crashes than weekdays, the opposite of the trend for all crashes.

incidents %>%
  filter(driver_substance_abuse_alcohol_present | driver_substance_abuse_alcohol_contributed) %>%
  select(incident_weekday) %>%
  ggplot() +
  stat_count(aes(x = incident_weekday), fill = single_color) +
  scale_x_continuous(
    name = "Day of Week",
    breaks = 1:7,
    labels = c("Sunday", "Monday", "Tuesday", "Wednesday", "Thurday", "Friday", "Saturday")
  ) +
  ggtitle("Crashes Involving Alcohol by Day of Week") +
  basic_theme

Drivers

Overview of the drivers table structure.

skimr::skim_to_wide(drivers) %>% DT::datatable()

Driver Injuries

81.31% of drivers did not sustain any reported injuries. 0.92% of Drivers suffered serious or fatal injuries.

ordered_levels <- drivers %>%
  count(injury_severity) %>%
  arrange(desc(n)) %>%
  pull(injury_severity)
drivers %>%
  mutate(`Injury Severity` = factor(injury_severity, ordered_levels)) %>%
  ggplot() +
  stat_count(aes(x = `Injury Severity`), fill = single_color) +
  ggtitle("Severity of Driver Injuries") +
  basic_theme

drivers %>%
  count(injury_severity) %>%
  arrange(n) %>%
  knitr::kable()
injury_severity n
FATAL INJURY 70
SUSPECTED SERIOUS INJURY 861
SUSPECTED MINOR INJURY 7580
POSSIBLE INJURY 10330
NO APPARENT INJURY 81993

Vehichle Damage

Most reported accidents involved at least some vehicle damage. Just 3.74% of drivers reported no vehicle damage.

ordered_levels <- drivers %>%
  count(vehicle_damage_extent) %>%
  arrange(desc(n)) %>%
  pull(vehicle_damage_extent)
drivers %>%
  mutate(`Vehicle Damage` = factor(vehicle_damage_extent, ordered_levels)) %>%
  ggplot() +
  stat_count(aes(x = `Vehicle Damage`), fill = single_color) +
  ggtitle("Severity of Vehicle Damage") +
  basic_theme

drivers %>%
  count(vehicle_damage_extent) %>%
  arrange(desc(n)) %>%
  knitr::kable()
vehicle_damage_extent n
DISABLING 35708
FUNCTIONAL 27284
SUPERFICIAL 26431
DESTROYED 3951
NO DAMAGE 3777
UNKNOWN 3441
N/A 185
OTHER 57

Speed Limits

drivers %>%
  ggplot() +
  stat_count(aes(x = speed_limit, fill = ), fill = single_color) +
  ggtitle("Speed Limits") +
  basic_theme

ordered_injury <- drivers %>%
  count(injury_severity) %>%
  arrange(desc(n)) %>%
  pull(injury_severity)
drivers %>%
  mutate(
    `Speed Limit` = factor(c("LOW", "MEDIUM", "HIGH")[1 + findInterval(speed_limit, c(31, 46))], levels = c("LOW", "MEDIUM", "HIGH")),
    `Injury Severity` = factor(injury_severity, ordered_injury)
  ) %>%
  ggplot() +
  stat_count(aes(x = `Injury Severity`, fill = `Speed Limit`)) +
  basic_theme +
  theme(legend.position = c(.9, .9)) +
  ggtitle("Injuries and Speed Limits")

Types of Vehicles

ordered_levels <- drivers %>%
  count(vehicle_body_type) %>%
  arrange(desc(n)) %>%
  pull(vehicle_body_type)
drivers %>%
  mutate(`Body Type` = factor(as.character(fct_collapse(vehicle_body_type, OTHER = ordered_levels[-(1:5)])), c(ordered_levels[1:5], "OTHER"))) %>%
  ggplot() +
  stat_count(aes(x = `Body Type`), fill = single_color) +
  ggtitle("Vehicle Body Types") +
  basic_theme

Non-Motorists

Overview of Non-Motorists table structure

skimr::skim_to_wide(non_motorists) %>% DT::datatable()

Types of Non Motorists

Pedestrians are the most common type of non motorist, followed by bicyclists.

ordered_levels <- non_motorists %>%
  count(pedestrian_type) %>%
  arrange(desc(n)) %>%
  pull(pedestrian_type)
non_motorists %>%
  mutate(pedestrian_type = factor(pedestrian_type, ordered_levels)) %>%
  ggplot() +
  stat_count(aes(x = pedestrian_type), fill = single_color) +
  basic_theme +
  ggtitle("Types of Non-Motorists")

Locations of Incidents Involving Non-Motorists

pal <- colorFactor(c("blue", "red"), domain = c(0, 1))
non_motorists %>%
  leaflet() %>%
  addTiles() %>%
  addCircleMarkers(lng = ~longitude, lat = ~latitude, radius = 3, stroke = TRUE, weight = 2, opacity = 1, color = ~ pal(not_in_county))

Injuries

8.58% of non-motorists did not sustain any reported injuries. 13.09% of non-motorists suffered serious or fatal injuries.

ordered_levels <- non_motorists %>%
  count(injury_severity) %>%
  arrange(desc(n)) %>%
  pull(injury_severity)
non_motorists %>%
  mutate(`Injury Severity` = factor(injury_severity, ordered_levels)) %>%
  ggplot() +
  stat_count(aes(x = `Injury Severity`), fill = single_color) +
  ggtitle("Severity of Non Motorist Injuries") +
  basic_theme

non_motorists %>%
  count(injury_severity) %>%
  arrange(n) %>%
  knitr::kable()
injury_severity n
FATAL INJURY 63
NO APPARENT INJURY 274
SUSPECTED SERIOUS INJURY 355
POSSIBLE INJURY 1065
SUSPECTED MINOR INJURY 1436

Bicycle Helmet Usage

ordered_levels <- non_motorists %>%
  filter(pedestrian_type == "BICYCLIST") %>%
  count(safety_equipment) %>%
  arrange(desc(n)) %>%
  pull(safety_equipment)
non_motorists %>%
  filter(pedestrian_type == "BICYCLIST") %>%
  mutate(`Safety Equipment` = factor(safety_equipment, ordered_levels)) %>%
  ggplot() +
  stat_count(aes(x = `Safety Equipment`), fill = single_color) +
  basic_theme +
  ggtitle("What Kinds of Safety Equiptment Do Bicyclists Use?")

The more severe injuries tend to have lower rates of helmet usage than minor injuries.

ordered_safety <- non_motorists %>%
  filter(pedestrian_type == "BICYCLIST") %>%
  count(safety_equipment) %>%
  arrange(desc(n)) %>%
  pull(safety_equipment)
ordered_injury <- non_motorists %>%
  filter(pedestrian_type == "BICYCLIST") %>%
  count(injury_severity) %>%
  arrange(desc(n)) %>%
  pull(injury_severity)
non_motorists %>%
  filter(pedestrian_type == "BICYCLIST") %>%
  mutate(
    `Safety Equipment` = fct_collapse(safety_equipment, OTHER = ordered_safety[-(1:2)]),
    `Injury Severity` = factor(injury_severity, ordered_injury)
  ) %>%
  ggplot() +
  stat_count(aes(x = `Injury Severity`, fill = `Safety Equipment`)) +
  basic_theme +
  theme(legend.position = c(.9, .9)) +
  ggtitle("Injuries and Helmets")

Consistency Check

fatal_drivers <- drivers %>%
  filter(injury_severity_fatal_injury == 1) %>%
  pull(report_number)
fatal_nonm <- non_motorists %>%
  filter(injury_severity_fatal_injury == 1) %>%
  pull(report_number)
fatal_acrs <- incidents %>%
  filter(acrs_report_type_fatal_crash == 1) %>%
  pull(report_number)
recorded_fatal <- c(fatal_drivers, fatal_nonm)
# vector of fatal reports with no recorded fatality
probs <- fatal_acrs %>% .[!. %in% recorded_fatal]
incidents %>%
  filter(report_number %in% probs) %>%
  DT::datatable(options = list(pageLength = 5))
drivers %>%
  filter(report_number %in% probs) %>%
  DT::datatable(options = list(pageLength = 5))
non_motorists %>%
  filter(report_number %in% probs) %>%
  DT::datatable(options = list(pageLength = 5))
drivers %>%
  group_by(report_number) %>%
  tally() %>%
  arrange(desc(n))
## # A tibble: 56,167 x 2
##    report_number     n
##    <chr>         <int>
##  1 MCP12130045       9
##  2 MCP2667000H       8
##  3 MCP1227000M       7
##  4 MCP15800085       7
##  5 MCP23580027       7
##  6 MCP2513001C       7
##  7 MCP2617006L       7
##  8 MCP3140000Y       7
##  9 MCP9130001S       7
## 10 MCP9422000Z       7
## # ... with 56,157 more rows
drivers %>%
  count(report_number) %>%
  count(n) %>%
  rename(`Vehicles Involved` = n) %>%
  knitr::kable()
Vehicles Involved nn
1 17435
2 33810
3 4089
4 693
5 113
6 17
7 8
8 1
9 1
non_motorists %>%
  group_by(report_number) %>%
  tally() %>%
  arrange(desc(n))
## # A tibble: 3,034 x 2
##    report_number     n
##    <chr>         <int>
##  1 MCP229800LT       4
##  2 MCP2546002K       4
##  3 DD5603004V        3
##  4 DM8445001H        3
##  5 DM8457000R        3
##  6 DM8463000L        3
##  7 EJ7809000W        3
##  8 EJ7833003W        3
##  9 MCP1438002S       3
## 10 MCP20080044       3
## # ... with 3,024 more rows